********************************************************************************
*DYNAMIC IMPACTS OF PRICING GROUNDWATER
*Bruno, Jessoe, Hanemann in JAERE
*********************************************************************************
*QUALITY DATA PROCESSING PRIOR TO SPATIAL MANIPULATION IN ARCPY
********************************************************************************

clear all
capture log close
set more off

*SELECT OUTPUT DATE
global outputdate = "20220729"		 

*SET DIRECTORY
cd  "D:\Ellen\Dropbox\Pajaro_AgInnovation" 

*Load modified originals (adding =month, =day, =year, =numbervalue of Sitenum stored as text in excel, saved as csv)
*append 2018-2020
*FINd A WAY TOKEEP TURN OUT MEASUREMENTS; DON"T DROP STRINGSS

import delimited "D:\Ellen\Dropbox\Pajaro_AgInnovation\Data\Chloride_preprocessing_historicalto2017_20210601.csv"
*THIS DATASET IS A SUBSET FROM RAW FILE PV_Water_Groundwater_Water_Quality_Data.xls with =month() =day() =year() columns created manually before saving as csv
keep if constituent == "Chloride"
gen quarter=0
replace quarter =1 if month ==12 | month==1 | month==2
replace quarter =2 if month ==3 | month==4 | month==5
replace quarter =3 if month ==6 | month==7 | month==8
replace quarter =4 if month ==9 | month==10 | month==11
gen date = mdy(month, day, year)
format date %td
rename value_number chloride 
drop lab_sample datesampled timesampled fieldtech dateanalyzed constituent value reportdate pql resultqualifier
drop labname  method notes 
save "D:\Ellen\Dropbox\Pajaro_AgInnovation\Data\Chloride_preprocessing_historicalto2017_$outputdate.dta", replace

clear
import delimited "D:\Ellen\Dropbox\Pajaro_AgInnovation\Data\Chloride_preprocessing_2018to2020_20210527.csv"
keep if constituent == "Chloride"
gen quarter=0
replace quarter =1 if month ==12 | month==1 | month==2
replace quarter =2 if month ==3 | month==4 | month==5
replace quarter =3 if month ==6 | month==7 | month==8
replace quarter =4 if month ==9 | month==10 | month==11
gen date = mdy(month, day, year)
format date %td
rename value_number chloride 
drop lab_sample datesampled timesampled fieldtech dateanalyzed constituent value reportdate pql resultqualifier
drop labname  method notes
replace sitenum =" " if sitenum == "#VALUE!"
destring sitenum, replace

append using "D:\Ellen\Dropbox\Pajaro_AgInnovation\Data\Chloride_preprocessing_historicalto2017_$outputdate.dta"
collapse (mean) chloride, by (sitenum sitename year quarter)
drop if year<2001
*drop if sitenum ==.
bys year: sum chloride

save "D:\Ellen\Dropbox\Pajaro_AgInnovation\Data\Chloride_preprocess_$outputdate.dta", replace

*CHLORIDE PROCESSING
local myyear 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 
local myquarter 1 2 3 4 

foreach x of local myyear {
	foreach y of local myquarter {
	use "D:\Ellen\Dropbox\Pajaro_AgInnovation\Data\Chloride_preprocess_$outputdate.dta"
	
	gsort -sitename
	
	keep if year == `x' 
	keep if quarter == `y'
	sum chloride
	
	save "D:\Ellen\Dropbox\Pajaro_AgInnovation\Data\CL_tables\Replicate\CL_`x'_Q`y'_$outputdate.dta", replace
	outsheet sitenum sitename quarter year chloride using "D:\Ellen\Dropbox\Pajaro_AgInnovation\Data\CL_tables\Replicate\CL_`x'_Q`y'_$outputdate.csv", comma replace 
}
}


